


****************************

clear all
set maxvar 11000


do "Code/read_in_analysis_data.do"

***************************
* 


* Does the comparison of treatment and control for various variables:
* log1_ is the main specification:

foreach prefix in log1_ /* logN_ isnh_ none */  {
	
	* Sets up variables:
	local pref `prefix'
	if "`prefix'"=="none" local pref
	
	local leak_bas `pref'Tot_emiss_Leak1
	local leak_end `pref'Tot_emiss_Leak5
	local emis_bas `pref'Emissionskgd_1
	local emis_end `pref'Emissionskgd_5
	
	* sets up widtsh:
	if "`prefix'"=="log1_" local bwidth 1
	if "`prefix'"=="logN_" local bwidth 1
	if "`prefix'"=="isnh_" local bwidth 1
	if "`prefix'"=="none"  local bwidth 5
	
	* sets up titles:
	if "`prefix'"=="log1_" {
		local xtitle_leak "log(1 + baseline leakage)"
		local xtitle_emis "log(1 + baseline emissions)"
		local ytitle_leak "log(1 + endline leakage)"
		local ytitle_emis "log(1 + endline emissions)"
	}
	if "`prefix'"=="logN_" {
		local xtitle_leak "log(baseline leakage)"
		local ytitle_leak "log(endline leakage)"
		local ytitle_emis "log(endline emissions)"
	}
	if "`prefix'"=="isnh_" {
		local xtitle_leak "sinh{superscript:-1}(baseline leakage)"
		local xtitle_emis "sinh{superscript:-1}(baseline emissions)"
		local ytitle_leak "sinh{superscript:-1}(endline leakage)"
		local ytitle_emis "sinh{superscript:-1}(endline emissions)"
	}
	if "`prefix'"=="none" {
		local xtitle_leak "baseline leakage"
		local xtitle_emis "baseline emissions"
		local ytitle_leak "endline leakage"
		local ytitle_emis "endline emissions"
	}
	local if_clause_emiss
	local if_clause_leak
	if "`prefix'"=="none" {
		qui sum Tot_emiss_Leak1, detail
		local p75_leak = r(p75)
		local if_clause_leak & Tot_emiss_Leak1 <  `p75_leak'
		di "`if_clause_leak'"
		qui sum Emissionskgd_1, detail
		local p75_emiss = r(p75)
		local if_clause_emiss & Emissionskgd_1 <= `p75_emiss'
		di "`if_clause_emiss'"
	}
	
	* does graphs:
	* leakage regressed on leakage
	#delimit ;
	twoway lpoly `leak_end' `leak_bas' if Treatment_Ind==1 `if_clause_leak', bwidth(`bwidth') lpattern(dash) lcolor(black) || 
	       lpoly `leak_end' `leak_bas' if Treatment_Ind==0 `if_clause_leak', bwidth(`bwidth') lpattern(solid) lcolor(black)
		   legend(label(1 "Treatment") label(2 "Control") position(6) ring(3) row(1) region(lcolor(black)))
		   xtitle("`xtitle_leak'")
		   ytitle("`ytitle_leak'")
		   graphregion(color(white)) bgcolor(white)
		   ;
	#delimit cr
	if "`prefix'" == "log1_" graph export "Paper/Figures/lpoly_log1_leakage_on_log1_leakage.pdf", replace
	if "`prefix'" == "logN_" graph export "Paper/Figures/lpoly_logN_leakage_on_logN_leakage.pdf", replace
	if "`prefix'" == "isnh_" graph export "Paper/Figures/lpoly_isnh_leakage_on_isnh_leakage.pdf", replace
	if "`prefix'" == "none"  graph export "Paper/Figures/lpoly_raw_leakage_on_raw_leakage_ltp75.pdf", replace
	
	* emissions regressed on leakage
	#delimit ;
	twoway lpoly `emis_end' `leak_bas' if Treatment_Ind==1 `if_clause_leak', bwidth(`bwidth') lpattern(dash) lcolor(black) || 
	       lpoly `emis_end' `leak_bas' if Treatment_Ind==0 `if_clause_leak', bwidth(`bwidth') lpattern(solid) lcolor(black)
		   legend(label(1 "Treatment") label(2 "Control") position(6) ring(3) row(1) region(lcolor(black)))
		   xtitle("`xtitle_leak'")
		   ytitle("`ytitle_emis'")
		   graphregion(color(white)) bgcolor(white)
		   ;
	#delimit cr
	if "`prefix'" == "log1_" graph export "Paper/Figures/lpoly_log1_emissions_on_log1_leakage.pdf", replace
	if "`prefix'" == "logN_" graph export "Paper/Figures/lpoly_logN_emissions_on_logN_leakage.pdf", replace
	if "`prefix'" == "isnh_" graph export "Paper/Figures/lpoly_isnh_emissions_on_isnh_leakage.pdf", replace
	if "`prefix'" == "none"  graph export "Paper/Figures/lpoly_raw_emissions_on_raw_leakage_ltp75.pdf", replace
	
	
}



* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * 


* Creates a running variable of baseline leakage for predicting at:
qui sum log1_Tot_emiss_Leak1
local leak1_max = floor(r(max)*10)

	egen predict_at = fill(0(1)`leak1_max')
	replace predict_at = . if predict_at > `leak1_max'
	replace predict_at = predict_at / 10
	replace predict_at = . if predict_at >= (`leak1_max' + 0.5)/10
	count if ~missing(predict_at)
	local count_predict_at = r(N)
	
	mkmat predict_at if ~missing(predict_at), matrix(predict_at)

* same for baseline leakage:
qui sum log1_Emissionskgd_1 
local emiss1_max = floor(r(max)*10)

	egen emiss_predict_at = fill(0(1)`emiss1_max')
	replace emiss_predict_at = . if emiss_predict_at > `emiss1_max'
	replace emiss_predict_at = emiss_predict_at / 10
	replace emiss_predict_at = . if emiss_predict_at >= (`emiss1_max' + 0.5)/10
	count if ~missing(emiss_predict_at)
	local count_emiss_predict_at = r(N)
	
	mkmat emiss_predict_at if ~missing(emiss_predict_at), matrix(emiss_predict_at)


local max_count_predict_at = max(`count_predict_at', `count_emiss_predict_at')	
	
	
	
	
* Creates predictions using the same lpoly specification as above for lpoly_log1_leakage_on_log1_leakage.pdf and lpoly_log1_leakage_on_log1_leakage.pdf
lpoly log1_Tot_emiss_Leak5 log1_Tot_emiss_Leak1 if Treatment_Ind==1, bwidth(1) nograph gen(lpoly_pred_treat)   at(predict_at)
lpoly log1_Tot_emiss_Leak5 log1_Tot_emiss_Leak1 if Treatment_Ind==0, bwidth(1) nograph gen(lpoly_pred_control) at(predict_at)
lpoly log1_Emissionskgd_5 log1_Tot_emiss_Leak1 if Treatment_Ind==1, bwidth(1) nograph gen(lpoly_pred_treatE)   at(predict_at)
lpoly log1_Emissionskgd_5 log1_Tot_emiss_Leak1 if Treatment_Ind==0, bwidth(1) nograph gen(lpoly_pred_controlE) at(predict_at)
lpoly log1_Emissionskgd_5 log1_Emissionskgd_1 if Treatment_Ind==1, bwidth(1) nograph gen(lpoly_pred_treatEE)   at(emiss_predict_at)
lpoly log1_Emissionskgd_5 log1_Emissionskgd_1 if Treatment_Ind==0, bwidth(1) nograph gen(lpoly_pred_controlEE) at(emiss_predict_at)


* Estimated treatment effect:
gen lpoly_pred_dif = lpoly_pred_treat - lpoly_pred_control
gen lpoly_pred_difE = lpoly_pred_treatE - lpoly_pred_controlE
gen lpoly_pred_difEE = lpoly_pred_treatEE - lpoly_pred_controlEE

* Gets the min of the max of treatment and control for later:
qui sum log1_Tot_emiss_Leak1 if Treatment_Ind==0
local max_control = r(max)
qui sum log1_Tot_emiss_Leak1 if Treatment_Ind==1
local max_treat = r(max)
local lpoly_pred_dif_minmax = min(`max_control', `max_treat')
local lpoly_pred_dif_maxmax = max(`max_control', `max_treat') 

qui sum log1_Tot_emiss_Leak1 if Treatment_Ind==0
local max_control = r(max)
qui sum log1_Tot_emiss_Leak1 if Treatment_Ind==1
local max_treat = r(max)
local lpoly_pred_difE_minmax = min(`max_control', `max_treat')
local lpoly_pred_difE_maxmax = max(`max_control', `max_treat')

qui sum log1_Emissionskgd_1 if Treatment_Ind==0
local max_control = r(max)
qui sum log1_Emissionskgd_1 if Treatment_Ind==1
local max_treat = r(max)
local lpoly_pred_difEE_minmax = min(`max_control', `max_treat')
local lpoly_pred_difEE_maxmax = max(`max_control', `max_treat')

* keeps these as a separate dataframe:
preserve
keep predict_at lpoly_pred_dif lpoly_pred_difE 
drop if missing(predict_at)
tempfile lpoly_pred_dif
save "`lpoly_pred_dif'"
restore

* keeps these as a separate dataframe:
preserve
keep emiss_predict_at lpoly_pred_difEE
drop if missing(emiss_predict_at)
tempfile lpoly_emiss_pred_dif
save "`lpoly_emiss_pred_dif'"
restore


* Preps for bootstrapping:	

set seed 100
tempfile temp
save "`temp'", replace

local bs_reps_lpoly = bs_reps_lpoly


* bootstraps:
forvalues i=1/`bs_reps_lpoly' {
	
	use "`temp'", clear
	
	di "on iteration `i'"
	
	* Drops because not consistent. Will 
	drop predict_at emiss_predict_at

	* bootstrap draw of data:
	bsample, cluster(Operator_1)
	qui count
	if r(N)<`max_count_predict_at' set obs `max_count_predict_at'
	
	count
	
	* Creates the "at" variables
	qui egen predict_at = fill(0(1)`leak1_max')
	qui replace predict_at = . if predict_at > `leak1_max'
	qui replace predict_at = predict_at / 10
	qui replace predict_at = . if predict_at >= (`leak1_max' + 0.5)/10

	qui egen emiss_predict_at = fill(0(1)`emiss1_max')
	qui replace emiss_predict_at = . if emiss_predict_at > `emiss1_max'
	qui replace emiss_predict_at = emiss_predict_at / 10
	qui replace emiss_predict_at = . if emiss_predict_at >= (`emiss1_max' + 0.5)/10
	
	* Treatment group:
	lpoly log1_Tot_emiss_Leak5 log1_Tot_emiss_Leak1 if Treatment_Ind==1, bwidth(1) at(predict_at) gen(pred_treat_) nograph
	if `i'==1 mkmat pred_treat_ if ~missing(predict_at), matrix(pred_treat_)
	if `i'>1 {
		mkmat pred_treat_ if ~missing(predict_at), matrix(pred_treat_i)
		matrix pred_treat_ = [pred_treat_, pred_treat_i]
	}
	
	* Control group:
	lpoly log1_Tot_emiss_Leak5 log1_Tot_emiss_Leak1 if Treatment_Ind==0, bwidth(1) at(predict_at) gen(pred_control_) nograph
	if `i'==1 mkmat pred_control_ if ~missing(predict_at), matrix(pred_control_)
	if `i'>1 {
		mkmat pred_control_ if ~missing(predict_at), matrix(pred_control_i)
		matrix pred_control_ = [pred_control_, pred_control_i]
	}
	
	* Same thing except for log total emissions as outcome:
	* reatment group:
	lpoly log1_Emissionskgd_5 log1_Tot_emiss_Leak1 if Treatment_Ind==1, bwidth(1) at(predict_at) gen(pred_treatE_) nograph
	if `i'==1 mkmat pred_treatE_ if ~missing(predict_at), matrix(pred_treatE_)
	if `i'>1 {
		mkmat pred_treatE_ if ~missing(predict_at), matrix(pred_treatE_i)
		matrix pred_treatE_ = [pred_treatE_, pred_treatE_i]
	}
	
	* Control group:
	lpoly log1_Emissionskgd_5 log1_Tot_emiss_Leak1 if Treatment_Ind==0, bwidth(1) at(predict_at) gen(pred_controlE_) nograph
	if `i'==1 mkmat pred_controlE_ if ~missing(predict_at), matrix(pred_controlE_)
	if `i'>1 {
		mkmat pred_controlE_ if ~missing(predict_at), matrix(pred_controlE_i)
		matrix pred_controlE_ = [pred_controlE_, pred_controlE_i]
	}
	
	* Same thing except for log total emissions as both running variable and outcome:
	* reatment group:
	lpoly log1_Emissionskgd_5 log1_Emissionskgd_1 if Treatment_Ind==1, bwidth(1) at(emiss_predict_at) gen(pred_treatEE_) nograph
	if `i'==1 mkmat pred_treatEE_ if ~missing(emiss_predict_at), matrix(pred_treatEE_)
	if `i'>1 {
		mkmat pred_treatEE_ if ~missing(emiss_predict_at), matrix(pred_treatEE_i)
		matrix pred_treatEE_ = [pred_treatEE_, pred_treatEE_i]
	}
	
	* Control group:
	lpoly log1_Emissionskgd_5 log1_Emissionskgd_1 if Treatment_Ind==0, bwidth(1) at(emiss_predict_at) gen(pred_controlEE_) nograph
	if `i'==1 mkmat pred_controlEE_ if ~missing(emiss_predict_at), matrix(pred_controlEE_)
	if `i'>1 {
		mkmat pred_controlEE_ if ~missing(emiss_predict_at), matrix(pred_controlEE_i)
		matrix pred_controlEE_ = [pred_controlEE_, pred_controlEE_i]
	}

	
	
	* Clears
	clear
	
}


* Turns these predictions back into data
svmat predict_at
rename predict_at1 predict_at
svmat pred_control_
svmat pred_treat_
svmat pred_controlE_
svmat pred_treatE_ 

svmat emiss_predict_at 
rename emiss_predict_at1 emiss_predict_at 
svmat pred_controlEE_
svmat pred_treatEE_


* Creates a difference variable
forvalues i=1/`bs_reps_lpoly' {
	gen pred_dif_`i' = pred_treat_`i' - pred_control_`i'
	gen pred_difE_`i' = pred_treatE_`i' - pred_controlE_`i'
	gen pred_difEE_`i' = pred_treatEE_`i' - pred_controlEE_`i'

}
tempfile pred_dif
save "`pred_dif'", replace


*****

* Reshapes to long for the case where log leakage is the running variable:
keep pred_treat_* pred_control_* pred_dif_* pred_treatE_* pred_controlE_* pred_difE_* predict_at
keep if ~missing(predict_at)
reshape long pred_treat_ pred_control_ pred_dif_ pred_treatE_ pred_controlE_ pred_difE_, i(predict_at) j(j)

rename pred_treat_ pred_treat 
rename pred_control_ pred_control
rename pred_dif_ pred_dif 
rename pred_treatE_ pred_treatE 
rename pred_controlE_ pred_controlE 
rename pred_difE_ pred_difE 



* Gets 95 percent confidence interval:
gen conf_int_95_ub = .
gen conf_int_95_lb = .

gen conf_int_95E_ub = .
gen conf_int_95E_lb = .

qui levelsof predict_at
foreach level in `r(levels)' {
	centile pred_dif if predict_at == `level', centile(2.5 97.5)
	qui replace conf_int_95_ub = r(c_2) if predict_at == `level'
	qui replace conf_int_95_lb = r(c_1) if predict_at == `level'

	centile pred_difE if predict_at == `level', centile(2.5 97.5)
	qui replace conf_int_95E_ub = r(c_2) if predict_at == `level'
	qui replace conf_int_95E_lb = r(c_1) if predict_at == `level'
	
} 

* Reshapes to wide:
reshape wide pred_treat pred_control pred_dif pred_treatE pred_controlE pred_difE, i(predict_at) j(j)

* saves:

* Merges in the lpoly_pred_dif :
merge 1:1 predict_at using "`lpoly_pred_dif'"

* Graphs the 95 percent confidence interval:
#delimit ;
twoway line lpoly_pred_dif predict_at if predict_at <= `lpoly_pred_dif_minmax', lpattern(solid) lcolor(black) ||
	   line conf_int_95_ub predict_at if predict_at <= `lpoly_pred_dif_minmax', lpattern(dash) lcolor(black) ||
       line conf_int_95_lb predict_at if predict_at <= `lpoly_pred_dif_minmax', lpattern(dash) lcolor(black)
	   ytitle("endline treatment - endline control")
	   xtitle("log(1 + baseline leakage)")
	   graphregion(color(white)) bgcolor(white)  xscale(range(0/`lpoly_pred_dif_maxmax'))
	   legend(label(1 "treatment minus control") label(2 "95% confidence intervals") order(1 2)  position(6) ring(3) row(1) region(lcolor(black)))
	   ;
		#delimit cr
graph export "Paper/Figures/lpoly_log1_leakage_on_log1_leakage_DIF.pdf", replace


#delimit ;
twoway line lpoly_pred_difE predict_at if predict_at <= `lpoly_pred_difE_minmax', lpattern(solid) lcolor(black) ||
	   line conf_int_95E_ub predict_at if predict_at <= `lpoly_pred_difE_minmax', lpattern(dash) lcolor(black) ||
       line conf_int_95E_lb predict_at if predict_at <= `lpoly_pred_difE_minmax', lpattern(dash) lcolor(black)
	   ytitle("endline treatment - endline control")
	   xtitle("log(1 + baseline leakage)")
	   graphregion(color(white)) bgcolor(white) xscale(range(0/`lpoly_pred_difE_maxmax'))
	   legend(label(1 "treatment minus control") label(2 "95% confidence intervals") order(1 2)  position(6) ring(3) row(1) region(lcolor(black)))
	   ;
		#delimit cr
graph export "Paper/Figures/lpoly_log1_emissions_on_log1_leakage_DIF.pdf", replace



